set.seed(1)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.1
library(ggplot2)
library(xlsx)
library(tree)

Whole Slide SNA

Data Preprocessing

Skip Data Preprocessing

Reading in and storing datasets from file

d_Norm <- read.csv('ObjectStatistics_ROI_Norm.csv')
d_FVP <- read.csv('ObjectStatistics_ROI_FVP.csv')
d_GDM <- read.csv('ObjectStatistics_ROI_GDM.csv')
d_PE <- read.csv('ObjectStatistics_ROI_PE.csv')

d_Combined <- read.xlsx('SingleLabID_NormbyVilliArea_2.xlsx', sheetIndex = 1)

Data Analysis

Skip to Summary of Norm

Skip to Summary of FVP

Skip to Summary of GDM

Skip to Summary of PE

Skip to Summary of All Groups by Slide

Skip to Summary of All Groups by Patient Average

By Patient

Box_by_Patient <- function(x){
  # foctorizing by name
  x[,1] <- as.factor(x[,1])
  
  # printing boxplots
  for(V_i in 4:24){
    V <- colnames(x)[V_i]
    N <- colnames(x)[1]
    g <- ggplot(x, aes_string(x = N, y=V))
    print(g + geom_boxplot() + 
            labs(title = paste("Box Plot of",V,"by Patient"),
                 x = "Patient"))
  }
}

Norm

Box_by_Patient(d_Norm)

Back to Top

FVP

Box_by_Patient(d_FVP)

Back to Top

GDM

Box_by_Patient(d_GDM)

Back to Top

PE

Box_by_Patient(d_PE)

Back to Top

By Group

Individual Slide

# matching column names to combine, there was a typo
colnames(d_FVP) = colnames(d_PE) = colnames(d_Norm) = colnames(d_GDM)

# also removing variables that are irrelevent and may contain NA's
d_All <- rbind.data.frame(d_Norm[,1:24], d_GDM[,1:24], d_FVP[,1:24], d_PE[,1:24])
Box_by_Group <- function(x){
  # factorizing by group
  x[,3] <- as.factor(x[,3])
  
  # printing boxplots
  for(V_i in 4:24){
    V <- colnames(x)[V_i]
    g <- ggplot(x, aes_string(x = "Group", y=V))
    print(g + geom_boxplot()+
            labs(title = paste("Boxplot of",V,"by Group"),
                 x = "Group"))
  }
}
Box_by_Group(d_All)

Back to Top

Patient Average

Box_by_Group.2 <- function(x){
  # factorizing by group
  x[,1] <- as.factor(x[,1])
  
  # printing boxplots
  for(V_i in 3:40){
    V <- colnames(x)[V_i]
    g <- ggplot(x, aes_string(x = "Group", y=V))
    print(g + geom_boxplot()+
            labs(title = paste("Boxplot of",V,"by Group"),
                 x = "Group"))
  }
}
Box_by_Group.2(d_Combined)

## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

Back to Top

Summary by Gender

Box_by_Patient.G <- function(x){
  # foctorizing by gender
  x[,36] <- as.factor(x[,36])
  
  # printing boxplots
  for(V_i in 4:24){
    V <- colnames(x)[V_i]
    N <- colnames(x)[36]
    g <- ggplot(x, aes_string(x = N, y=V))
    print(g + geom_boxplot() + 
            labs(title = paste("Box Plot of",V,"by Gender"),
                 x = "Gender"))
  }
}

Norm

Box_by_Patient.G(d_Norm)

Back to Top

FVP

Box_by_Patient.G(d_FVP)

Back to Top

GDM

Box_by_Patient.G(d_GDM)

Back to Top

PE

Box_by_Patient.G(d_PE)

Prediction of Groups

d_Combined.tree <- d_Combined[,2:40]
tree <- tree(Group~., d_Combined.tree)
plot(tree)
text(tree)